Now we’ll go back to CI creation in the normal fashion. We’ll take the ames data from the CIs lab earlier today and regard it now as a sample, we won’t be drawing any smaller samples from within it. This is the usual situation in an analysis: you use all the data available to you!
Task 1.
Load the data again, clean_names(), and re-familiarise yourself with it
Ans 1.
Load libraries:
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages --------------------------------------------------------------------------------------- tidyverse 1.3.1 --
√ ggplot2 3.3.5 √ purrr 0.3.4
√ tibble 3.1.6 √ dplyr 1.0.8
√ tidyr 1.2.0 √ stringr 1.4.0
√ readr 2.1.2 √ forcats 0.5.1
-- Conflicts ------------------------------------------------------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(infer)
Warning: package ‘infer’ was built under R version 4.1.3
library(janitor)
Attaching package: ‘janitor’
The following objects are masked from ‘package:stats’:
chisq.test, fisher.test
Load and explore the data held in ames.csv
ames <- read_csv("data/ames.csv") %>%
clean_names()
Rows: 2930 Columns: 82
-- Column specification --------------------------------------------------------------------------------------------------------
Delimiter: ","
chr (43): MS.Zoning, Street, Alley, Lot.Shape, Land.Contour, Utilities, Lot.Config, Land.Slope, Neighborhood, Condition.1, C...
dbl (39): Order, PID, MS.SubClass, Lot.Frontage, Lot.Area, Overall.Qual, Overall.Cond, Year.Built, Year.Remod.Add, Mas.Vnr.A...
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(ames)
Rows: 2,930
Columns: 82
$ order <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,~
$ pid <dbl> 526301100, 526350040, 526351010, 526353030, 527105010, 527105030, 527127150, 527145080, 527146030, 527~
$ ms_sub_class <dbl> 20, 20, 20, 20, 60, 60, 120, 120, 120, 60, 60, 20, 60, 20, 120, 60, 50, 20, 20, 20, 20, 85, 60, 20, 20~
$ ms_zoning <chr> "RL", "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", ~
$ lot_frontage <dbl> 141, 80, 81, 93, 74, 78, 41, 43, 39, 60, 75, NA, 63, 85, NA, 47, 152, 88, 140, 85, 105, 85, NA, NA, NA~
$ lot_area <dbl> 31770, 11622, 14267, 11160, 13830, 9978, 4920, 5005, 5389, 7500, 10000, 7980, 8402, 10176, 6820, 53504~
$ street <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave"~
$ alley <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
$ lot_shape <chr> "IR1", "Reg", "IR1", "Reg", "IR1", "IR1", "Reg", "IR1", "IR1", "Reg", "IR1", "IR1", "IR1", "Reg", "IR1~
$ land_contour <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "HLS", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl~
$ utilities <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "A~
$ lot_config <chr> "Corner", "Inside", "Corner", "Corner", "Inside", "Inside", "Inside", "Inside", "Inside", "Inside", "C~
$ land_slope <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl~
$ neighborhood <chr> "NAmes", "NAmes", "NAmes", "NAmes", "Gilbert", "Gilbert", "StoneBr", "StoneBr", "StoneBr", "Gilbert", ~
$ condition_1 <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm~
$ condition_2 <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm"~
$ bldg_type <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "TwnhsE", "TwnhsE", "TwnhsE", "1Fam", "1Fam", "1Fam", ~
$ house_style <chr> "1Story", "1Story", "1Story", "1Story", "2Story", "2Story", "1Story", "1Story", "1Story", "2Story", "2~
$ overall_qual <dbl> 6, 5, 6, 7, 5, 6, 8, 8, 8, 7, 6, 6, 6, 7, 8, 8, 8, 9, 4, 6, 6, 7, 7, 6, 5, 5, 4, 4, 7, 6, 5, 6, 6, 6, ~
$ overall_cond <dbl> 5, 6, 6, 5, 5, 6, 5, 5, 5, 5, 5, 7, 5, 5, 5, 5, 7, 2, 5, 6, 6, 6, 5, 7, 6, 6, 5, 5, 5, 5, 5, 5, 5, 6, ~
$ year_built <dbl> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995, 1999, 1993, 1992, 1998, 1990, 1985, 2003, 1988, ~
$ year_remod_add <dbl> 1960, 1961, 1958, 1968, 1998, 1998, 2001, 1992, 1996, 1999, 1994, 2007, 1998, 1990, 1985, 2003, 2005, ~
$ roof_style <chr> "Hip", "Gable", "Hip", "Hip", "Gable", "Gable", "Gable", "Gable", "Gable", "Gable", "Gable", "Gable", ~
$ roof_matl <chr> "CompShg", "CompShg", "CompShg", "CompShg", "CompShg", "CompShg", "CompShg", "CompShg", "CompShg", "Co~
$ exterior_1st <chr> "BrkFace", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd", "VinylSd", "CemntBd", "HdBoard", "CemntBd", "Vi~
$ exterior_2nd <chr> "Plywood", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd", "VinylSd", "CmentBd", "HdBoard", "CmentBd", "Vi~
$ mas_vnr_type <chr> "Stone", "None", "BrkFace", "None", "None", "BrkFace", "None", "None", "None", "None", "None", "None",~
$ mas_vnr_area <dbl> 112, 0, 108, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 603, 0, 350, 0, 119, 480, 81, 0, 180, 0, 0, 0, 0, 0,~
$ exter_qual <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "Gd", "Gd", "Gd", "TA", "TA", "TA", "TA", "TA", "Gd", "Ex", "Gd", ~
$ exter_cond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "Gd", "TA", "TA", "TA", "TA", "TA", ~
$ foundation <chr> "CBlock", "CBlock", "CBlock", "CBlock", "PConc", "PConc", "PConc", "PConc", "PConc", "PConc", "PConc",~
$ bsmt_qual <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", "TA", "Gd", "Gd", "Gd", "Gd", "Gd", "Gd", "Gd", ~
$ bsmt_cond <chr> "Gd", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", ~
$ bsmt_exposure <chr> "Gd", "No", "No", "No", "No", "No", "Mn", "No", "No", "No", "No", "No", "No", "Gd", "Av", "Gd", "Av", ~
$ bsmt_fin_type_1 <chr> "BLQ", "Rec", "ALQ", "ALQ", "GLQ", "GLQ", "GLQ", "ALQ", "GLQ", "Unf", "Unf", "ALQ", "Unf", "GLQ", "GLQ~
$ bsmt_fin_sf_1 <dbl> 639, 468, 923, 1065, 791, 602, 616, 263, 1180, 0, 0, 935, 0, 637, 368, 1416, 427, 1445, 120, 790, 705,~
$ bsmt_fin_type_2 <chr> "Unf", "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "BLQ~
$ bsmt_fin_sf_2 <dbl> 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1120, 0, 0, 0, 0, 163, 0, 168, 0, 0, 0, 0, 78, 0, 0, 0, 0,~
$ bsmt_unf_sf <dbl> 441, 270, 406, 1045, 137, 324, 722, 1017, 415, 994, 763, 233, 789, 663, 0, 234, 132, 411, 744, 589, 11~
$ total_bsmt_sf <dbl> 1080, 882, 1329, 2110, 928, 926, 1338, 1280, 1595, 994, 763, 1168, 789, 1300, 1488, 1650, 559, 1856, 8~
$ heating <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA"~
$ heating_qc <chr> "Fa", "TA", "TA", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", "Gd", "Gd", "Ex", "Gd", "Gd", "TA", "Ex", "Gd", ~
$ central_air <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y~
$ electrical <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SB~
$ x1st_flr_sf <dbl> 1656, 896, 1329, 2110, 928, 926, 1338, 1280, 1616, 1028, 763, 1187, 789, 1341, 1502, 1690, 1080, 1856,~
$ x2nd_flr_sf <dbl> 0, 0, 0, 0, 701, 678, 0, 0, 0, 776, 892, 0, 676, 0, 0, 1589, 672, 0, 0, 0, 0, 0, 860, 0, 0, 0, 0, 0, 0~
$ low_qual_fin_sf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
$ gr_liv_area <dbl> 1656, 896, 1329, 2110, 1629, 1604, 1338, 1280, 1616, 1804, 1655, 1187, 1465, 1341, 1502, 3279, 1752, 1~
$ bsmt_full_bath <dbl> 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, ~
$ bsmt_half_bath <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
$ full_bath <dbl> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 2, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, ~
$ half_bath <dbl> 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, ~
$ bedroom_abv_gr <dbl> 3, 2, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 2, 1, 4, 4, 1, 2, 3, 3, 3, 3, 2, 3, 3, 2, 3, 2, 2, 3, 3, 2, 3, ~
$ kitchen_abv_gr <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
$ kitchen_qual <chr> "TA", "TA", "Gd", "Ex", "TA", "Gd", "Gd", "Gd", "Gd", "Gd", "TA", "TA", "TA", "Gd", "Gd", "Ex", "TA", ~
$ tot_rms_abv_grd <dbl> 7, 5, 6, 8, 6, 7, 6, 5, 5, 7, 7, 6, 7, 5, 4, 12, 8, 8, 4, 7, 7, 6, 7, 5, 6, 6, 4, 5, 5, 5, 6, 6, 4, 6,~
$ functional <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ~
$ fireplaces <dbl> 2, 0, 0, 2, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 2, 1, 2, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, ~
$ fireplace_qu <chr> "Gd", NA, NA, "TA", "TA", "Gd", NA, NA, "TA", "TA", "TA", NA, "Gd", "Po", NA, "Gd", NA, "Ex", NA, "TA"~
$ garage_type <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "A~
$ garage_yr_blt <dbl> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995, 1999, 1993, 1992, 1998, 1990, 1985, 2003, 1988, ~
$ garage_finish <chr> "Fin", "Unf", "Unf", "Fin", "Fin", "Fin", "Fin", "RFn", "RFn", "Fin", "Fin", "Fin", "Fin", "Unf", "RFn~
$ garage_cars <dbl> 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 2, 2, 2, 2, 2, 2, 2, 1, 2, 0, 2, 1, 1, 1, 2, 2, ~
$ garage_area <dbl> 528, 730, 312, 522, 482, 470, 582, 506, 608, 442, 440, 420, 393, 506, 528, 841, 492, 834, 400, 500, 54~
$ garage_qual <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", ~
$ garage_cond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", ~
$ paved_drive <chr> "P", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y~
$ wood_deck_sf <dbl> 210, 140, 393, 0, 212, 360, 0, 0, 237, 140, 157, 483, 0, 192, 0, 503, 325, 113, 0, 349, 0, 0, 0, 0, 0,~
$ open_porch_sf <dbl> 62, 0, 36, 0, 34, 36, 0, 82, 152, 60, 84, 21, 75, 0, 54, 36, 12, 0, 0, 0, 122, 120, 96, 0, 0, 85, 0, 0~
$ enclosed_porch <dbl> 0, 0, 0, 0, 0, 0, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184, 0, 0, 0, 0, 0, 0, 0,~
$ x3ssn_porch <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
$ screen_porch <dbl> 0, 120, 0, 0, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 140, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ pool_area <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
$ pool_qc <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
$ fence <chr> NA, "MnPrv", NA, NA, "MnPrv", NA, NA, NA, NA, NA, NA, "GdPrv", NA, NA, NA, NA, NA, NA, NA, "MnPrv", "M~
$ misc_feature <chr> NA, NA, "Gar2", NA, NA, NA, NA, NA, NA, NA, NA, "Shed", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "S~
$ misc_val <dbl> 0, 0, 12500, 0, 0, 0, 0, 0, 0, 0, 0, 500, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 700, 0, 0, 0, 0, 0, 0, 0, 0~
$ mo_sold <dbl> 5, 6, 6, 4, 3, 6, 4, 1, 3, 6, 4, 3, 5, 2, 6, 6, 6, 6, 6, 2, 1, 1, 1, 3, 4, 7, 4, 4, 6, 2, 3, 3, 7, 6, ~
$ yr_sold <dbl> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, ~
$ sale_type <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", ~
$ sale_condition <chr> "Normal", "Normal", "Normal", "Normal", "Normal", "Normal", "Normal", "Normal", "Normal", "Normal", "N~
$ sale_price <dbl> 215000, 105000, 172000, 244000, 189900, 195500, 213500, 191500, 236500, 189000, 175900, 185000, 180400~
Check data for NA values
ames %>%
summarise(across(.cols = everything(),
.fns = ~sum(is.na(.x))))
NA
Task 2.
Investigate the distribution of lot_area. Is the distribution roughly normal? If not, what problems do you find?
Ans 2.
ames %>%
ggplot(aes(x = lot_area)) +
geom_histogram(col = "white", fill = "steel blue", alpha = 0.7, binwidth = 2000)
The distribution is not normal. There’s a significant right-skew.
Task 3.
Compute and visualise a bootstrap sampling distribution for the mean(lot_area) of the sold houses.
Ans 3.
bootstrap_sample <- ames %>%
rep_sample_n(size = nrow(ames), replace = TRUE, reps = 1000) %>%
summarise(
mean_lot_area = mean(lot_area)
)
bootstrap_sample
bootstrap_sample %>%
ggplot(aes(x = mean_lot_area)) +
geom_histogram(col = "white", fill = "steel blue", alpha = 0.7)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary_sample <- bootstrap_sample %>%
summarise(mean_lot_area = mean(mean_lot_area))
summary_sample
Task 4.
Use your bootstrap distribution to calculate a \(95\%\) CI for mean(lot_area), and visualise it on the distribution
Ans 4.
# specify that we want to look at tenure variable, 1000 reps, and want to calculate the mean
infer_resample <- bootstrap_sample %>%
specify(response = mean_lot_area) %>%
generate(reps = 1000, type = "bootstrap") %>%
calculate(stat = "mean")
infer_resample
Response: mean_lot_area (numeric)
infer_ci_95 <- infer_resample %>%
get_confidence_interval(level = 0.95, type = "percentile")
infer_ci_95
infer_resample %>%
visualise(bin = 20) +
shade_confidence_interval(endpoints = infer_ci_95)
Task 5.
You would like to know the mean(lot_area) of the sold houses with higher confidence. Calculate the \(99\%\) CI for this variable (you can re-use your bootstrap distribution from above). Is it narrower or broader than the \(95\%\) CI? Does that make sense?
Ans 5.
infer_ci_99 <- infer_resample %>%
get_confidence_interval(level = 0.99, type = "percentile")
infer_ci_99
The \(99\%\) CI is wider than the \(95\%\) CI. The bounds have moved away from the mean lot area.
Task 6.
Calculate the point estimate of the mean(lot_area)
Ans 6.
sample_200 <- ames %>%
rep_sample_n(size = 200, reps = 1)
sample_200
summary_sample_200 <- sample_200 %>%
ungroup() %>%
summarise(mean_lot_area = mean(lot_area))
summary_sample_200
Task 1.
Calculate a point estimate and \(95\%\) CI for the proportion of houses in the data built before 1920. Does the number of reps you use matter? [Investigate reps from \(200\) up to \(50000\), memory of your laptop permitting].
Ans.
sample_year_built <- ames %>%
filter(year_built < 1920) %>%
rep_sample_n(size = 100, reps = 1)
sample_year_built
summary_sample_year_built <- sample_year_built %>%
ungroup() %>%
summarise(
mean_lot_area = mean(lot_area),
lower_bound = quantile(lot_area, probs = 0.025),
upper_bound = quantile(lot_area, probs = 0.975),
ci_width = upper_bound - lower_bound
)
summary_sample_year_built
sample_year_built <- ames %>%
filter(year_built < 1920) %>%
rep_sample_n(size = 100, reps = 200)
sample_year_built
summary_sample_year_built <- sample_year_built %>%
ungroup() %>%
summarise(
mean_lot_area = mean(lot_area),
lower_bound = quantile(lot_area, probs = 0.025),
upper_bound = quantile(lot_area, probs = 0.975),
ci_width = upper_bound - lower_bound
)
summary_sample_year_built
sample_year_built <- ames %>%
filter(year_built < 1920) %>%
rep_sample_n(size = 100, reps = 1000)
sample_year_built
summary_sample_year_built <- sample_year_built %>%
ungroup() %>%
summarise(
mean_lot_area = mean(lot_area),
lower_bound = quantile(lot_area, probs = 0.025),
upper_bound = quantile(lot_area, probs = 0.975),
ci_width = upper_bound - lower_bound
)
summary_sample_year_built
sample_year_built <- ames %>%
filter(year_built < 1920) %>%
rep_sample_n(size = 100, reps = 2000)
sample_year_built
summary_sample_year_built <- sample_year_built %>%
ungroup() %>%
summarise(
mean_lot_area = mean(lot_area),
lower_bound = quantile(lot_area, probs = 0.025),
upper_bound = quantile(lot_area, probs = 0.975),
ci_width = upper_bound - lower_bound
)
summary_sample_year_built
sample_year_built <- ames %>%
filter(year_built < 1920) %>%
rep_sample_n(size = 100, reps = 5000)
sample_year_built
summary_sample_year_built <- sample_year_built %>%
ungroup() %>%
summarise(
mean_lot_area = mean(lot_area),
lower_bound = quantile(lot_area, probs = 0.025),
upper_bound = quantile(lot_area, probs = 0.975),
ci_width = upper_bound - lower_bound
)
summary_sample_year_built
sample_year_built <- ames %>%
filter(year_built < 1920) %>%
rep_sample_n(size = 100, reps = 10000)
sample_year_built
summary_sample_year_built <- sample_year_built %>%
ungroup() %>%
summarise(
mean_lot_area = mean(lot_area),
lower_bound = quantile(lot_area, probs = 0.025),
upper_bound = quantile(lot_area, probs = 0.975),
ci_width = upper_bound - lower_bound
)
summary_sample_year_built
sample_year_built <- ames %>%
filter(year_built < 1920) %>%
rep_sample_n(size = 100, reps = 50000)
sample_year_built
summary_sample_year_built <- sample_year_built %>%
ungroup() %>%
summarise(
mean_lot_area = mean(lot_area),
lower_bound = quantile(lot_area, probs = 0.025),
upper_bound = quantile(lot_area, probs = 0.975),
ci_width = upper_bound - lower_bound
)
summary_sample_year_built
For more than 200 repetitions, there’s very little change in the mean_lot_area calculation and no change observed in the CI calculation.